Skip to content

Conversation

@vikramRH
Copy link
Contributor

@vikramRH vikramRH commented Feb 6, 2025

No description provided.

@llvmbot
Copy link
Member

llvmbot commented Feb 6, 2025

@llvm/pr-subscribers-backend-amdgpu

Author: Vikram Hegde (vikramRH)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/126024.diff

8 Files Affected:

  • (modified) llvm/lib/Target/AMDGPU/AMDGPU.h (+1-1)
  • (modified) llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def (+1-1)
  • (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (+2-1)
  • (modified) llvm/lib/Target/AMDGPU/GCNRewritePartialRegUses.cpp (+66-42)
  • (added) llvm/lib/Target/AMDGPU/GCNRewritePartialRegUses.h (+23)
  • (modified) llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses-dbg.mir (+1)
  • (modified) llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses-gen.mir (+1)
  • (modified) llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses.mir (+1)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index 2c6b8828d5cfbab..048c39464be5418 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -460,7 +460,7 @@ extern char &GCNPreRAOptimizationsID;
 FunctionPass *createAMDGPUSetWavePriorityPass();
 void initializeAMDGPUSetWavePriorityPass(PassRegistry &);
 
-void initializeGCNRewritePartialRegUsesPass(llvm::PassRegistry &);
+void initializeGCNRewritePartialRegUsesLegacyPass(llvm::PassRegistry &);
 extern char &GCNRewritePartialRegUsesID;
 
 void initializeAMDGPUWaitSGPRHazardsLegacyPass(PassRegistry &);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
index 41ad1445f47e927..62716a9d725d90d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
@@ -98,6 +98,7 @@ FUNCTION_PASS_WITH_PARAMS(
 #endif
 MACHINE_FUNCTION_PASS("amdgpu-isel", AMDGPUISelDAGToDAGPass(*this))
 MACHINE_FUNCTION_PASS("amdgpu-pre-ra-long-branch-reg", GCNPreRALongBranchRegPass())
+MACHINE_FUNCTION_PASS("amdgpu-rewrite-partial-reg-uses", GCNRewritePartialRegUsesPass())
 MACHINE_FUNCTION_PASS("gcn-dpp-combine", GCNDPPCombinePass())
 MACHINE_FUNCTION_PASS("si-fix-sgpr-copies", SIFixSGPRCopiesPass())
 MACHINE_FUNCTION_PASS("si-fix-vgpr-copies", SIFixVGPRCopiesPass())
@@ -119,7 +120,6 @@ MACHINE_FUNCTION_PASS("si-wqm", SIWholeQuadModePass())
 DUMMY_MACHINE_FUNCTION_PASS("amdgpu-insert-delay-alu", AMDGPUInsertDelayAluPass())
 DUMMY_MACHINE_FUNCTION_PASS("amdgpu-nsa-reassign", GCNNSAReassignPass())
 DUMMY_MACHINE_FUNCTION_PASS("amdgpu-pre-ra-optimizations", GCNPreRAOptimizationsPass())
-DUMMY_MACHINE_FUNCTION_PASS("amdgpu-rewrite-partial-reg-uses", GCNRewritePartialRegUsesPass())
 DUMMY_MACHINE_FUNCTION_PASS("amdgpu-set-wave-priority", AMDGPUSetWavePriorityPass())
 
 DUMMY_MACHINE_FUNCTION_PASS("si-form-memory-clauses", SIFormMemoryClausesPass())
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index fffd30b26dc1d50..dedfcfcb4d5b4f2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -33,6 +33,7 @@
 #include "GCNDPPCombine.h"
 #include "GCNIterativeScheduler.h"
 #include "GCNPreRALongBranchReg.h"
+#include "GCNRewritePartialRegUses.h"
 #include "GCNSchedStrategy.h"
 #include "GCNVOPDUtils.h"
 #include "R600.h"
@@ -550,7 +551,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
   initializeGCNNSAReassignPass(*PR);
   initializeGCNPreRAOptimizationsPass(*PR);
   initializeGCNPreRALongBranchRegLegacyPass(*PR);
-  initializeGCNRewritePartialRegUsesPass(*PR);
+  initializeGCNRewritePartialRegUsesLegacyPass(*PR);
   initializeGCNRegPressurePrinterPass(*PR);
   initializeAMDGPUPreloadKernArgPrologLegacyPass(*PR);
   initializeAMDGPUWaitSGPRHazardsLegacyPass(*PR);
diff --git a/llvm/lib/Target/AMDGPU/GCNRewritePartialRegUses.cpp b/llvm/lib/Target/AMDGPU/GCNRewritePartialRegUses.cpp
index 077ccf36ea4fb28..cccb71d5f8e7287 100644
--- a/llvm/lib/Target/AMDGPU/GCNRewritePartialRegUses.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRewritePartialRegUses.cpp
@@ -28,6 +28,7 @@
 /// calculation and creates more possibilities for the code unaware of lanemasks
 //===----------------------------------------------------------------------===//
 
+#include "GCNRewritePartialRegUses.h"
 #include "AMDGPU.h"
 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
 #include "SIRegisterInfo.h"
@@ -44,25 +45,7 @@ using namespace llvm;
 
 namespace {
 
-class GCNRewritePartialRegUses : public MachineFunctionPass {
-public:
-  static char ID;
-  GCNRewritePartialRegUses() : MachineFunctionPass(ID) {}
-
-  StringRef getPassName() const override {
-    return "Rewrite Partial Register Uses";
-  }
-
-  void getAnalysisUsage(AnalysisUsage &AU) const override {
-    AU.setPreservesCFG();
-    AU.addPreserved<LiveIntervalsWrapperPass>();
-    AU.addPreserved<SlotIndexesWrapperPass>();
-    MachineFunctionPass::getAnalysisUsage(AU);
-  }
-
-  bool runOnMachineFunction(MachineFunction &MF) override;
-
-private:
+class GCNRewritePartialRegUsesImpl {
   MachineRegisterInfo *MRI;
   const SIRegisterInfo *TRI;
   const TargetInstrInfo *TII;
@@ -155,13 +138,36 @@ class GCNRewritePartialRegUses : public MachineFunctionPass {
   /// Cache for getAllocatableAndAlignedRegClassMask method:
   ///   AlignNumBits -> Class bitmask.
   mutable SmallDenseMap<unsigned, BitVector> AllocatableAndAlignedRegClassMasks;
+
+public:
+  GCNRewritePartialRegUsesImpl(LiveIntervals *LS) : LIS(LS) {}
+  bool run(MachineFunction &MF);
+};
+
+class GCNRewritePartialRegUsesLegacy : public MachineFunctionPass {
+public:
+  static char ID;
+  GCNRewritePartialRegUsesLegacy() : MachineFunctionPass(ID) {}
+
+  StringRef getPassName() const override {
+    return "Rewrite Partial Register Uses";
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesCFG();
+    AU.addPreserved<LiveIntervalsWrapperPass>();
+    AU.addPreserved<SlotIndexesWrapperPass>();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
 };
 
 } // end anonymous namespace
 
 // TODO: move this to the tablegen and use binary search by Offset.
-unsigned GCNRewritePartialRegUses::getSubReg(unsigned Offset,
-                                             unsigned Size) const {
+unsigned GCNRewritePartialRegUsesImpl::getSubReg(unsigned Offset,
+                                                 unsigned Size) const {
   const auto [I, Inserted] = SubRegs.try_emplace({Offset, Size}, 0);
   if (Inserted) {
     for (unsigned Idx = 1, E = TRI->getNumSubRegIndices(); Idx < E; ++Idx) {
@@ -175,15 +181,14 @@ unsigned GCNRewritePartialRegUses::getSubReg(unsigned Offset,
   return I->second;
 }
 
-unsigned GCNRewritePartialRegUses::shiftSubReg(unsigned SubReg,
-                                               unsigned RShift) const {
+unsigned GCNRewritePartialRegUsesImpl::shiftSubReg(unsigned SubReg,
+                                                   unsigned RShift) const {
   unsigned Offset = TRI->getSubRegIdxOffset(SubReg) - RShift;
   return getSubReg(Offset, TRI->getSubRegIdxSize(SubReg));
 }
 
-const uint32_t *
-GCNRewritePartialRegUses::getSuperRegClassMask(const TargetRegisterClass *RC,
-                                               unsigned SubRegIdx) const {
+const uint32_t *GCNRewritePartialRegUsesImpl::getSuperRegClassMask(
+    const TargetRegisterClass *RC, unsigned SubRegIdx) const {
   const auto [I, Inserted] =
       SuperRegMasks.try_emplace({RC, SubRegIdx}, nullptr);
   if (Inserted) {
@@ -197,7 +202,8 @@ GCNRewritePartialRegUses::getSuperRegClassMask(const TargetRegisterClass *RC,
   return I->second;
 }
 
-const BitVector &GCNRewritePartialRegUses::getAllocatableAndAlignedRegClassMask(
+const BitVector &
+GCNRewritePartialRegUsesImpl::getAllocatableAndAlignedRegClassMask(
     unsigned AlignNumBits) const {
   const auto [I, Inserted] =
       AllocatableAndAlignedRegClassMasks.try_emplace(AlignNumBits);
@@ -214,7 +220,7 @@ const BitVector &GCNRewritePartialRegUses::getAllocatableAndAlignedRegClassMask(
 }
 
 const TargetRegisterClass *
-GCNRewritePartialRegUses::getRegClassWithShiftedSubregs(
+GCNRewritePartialRegUsesImpl::getRegClassWithShiftedSubregs(
     const TargetRegisterClass *RC, unsigned RShift, unsigned RegNumBits,
     unsigned CoverSubregIdx, SubRegMap &SubRegs) const {
 
@@ -289,8 +295,8 @@ GCNRewritePartialRegUses::getRegClassWithShiftedSubregs(
 }
 
 const TargetRegisterClass *
-GCNRewritePartialRegUses::getMinSizeReg(const TargetRegisterClass *RC,
-                                        SubRegMap &SubRegs) const {
+GCNRewritePartialRegUsesImpl::getMinSizeReg(const TargetRegisterClass *RC,
+                                            SubRegMap &SubRegs) const {
   unsigned CoverSubreg = AMDGPU::NoSubRegister;
   unsigned Offset = std::numeric_limits<unsigned>::max();
   unsigned End = 0;
@@ -343,9 +349,8 @@ GCNRewritePartialRegUses::getMinSizeReg(const TargetRegisterClass *RC,
 
 // Only the subrange's lanemasks of the original interval need to be modified.
 // Subrange for a covering subreg becomes the main range.
-void GCNRewritePartialRegUses::updateLiveIntervals(Register OldReg,
-                                                   Register NewReg,
-                                                   SubRegMap &SubRegs) const {
+void GCNRewritePartialRegUsesImpl::updateLiveIntervals(
+    Register OldReg, Register NewReg, SubRegMap &SubRegs) const {
   if (!LIS->hasInterval(OldReg))
     return;
 
@@ -400,13 +405,13 @@ void GCNRewritePartialRegUses::updateLiveIntervals(Register OldReg,
 }
 
 const TargetRegisterClass *
-GCNRewritePartialRegUses::getOperandRegClass(MachineOperand &MO) const {
+GCNRewritePartialRegUsesImpl::getOperandRegClass(MachineOperand &MO) const {
   MachineInstr *MI = MO.getParent();
   return TII->getRegClass(TII->get(MI->getOpcode()), MI->getOperandNo(&MO), TRI,
                           *MI->getParent()->getParent());
 }
 
-bool GCNRewritePartialRegUses::rewriteReg(Register Reg) const {
+bool GCNRewritePartialRegUsesImpl::rewriteReg(Register Reg) const {
   auto Range = MRI->reg_nodbg_operands(Reg);
   if (Range.empty() || any_of(Range, [](MachineOperand &MO) {
         return MO.getSubReg() == AMDGPU::NoSubRegister; // Whole reg used. [1]
@@ -476,12 +481,10 @@ bool GCNRewritePartialRegUses::rewriteReg(Register Reg) const {
   return true;
 }
 
-bool GCNRewritePartialRegUses::runOnMachineFunction(MachineFunction &MF) {
+bool GCNRewritePartialRegUsesImpl::run(MachineFunction &MF) {
   MRI = &MF.getRegInfo();
   TRI = static_cast<const SIRegisterInfo *>(MRI->getTargetRegisterInfo());
   TII = MF.getSubtarget().getInstrInfo();
-  auto *LISWrapper = getAnalysisIfAvailable<LiveIntervalsWrapperPass>();
-  LIS = LISWrapper ? &LISWrapper->getLIS() : nullptr;
   bool Changed = false;
   for (size_t I = 0, E = MRI->getNumVirtRegs(); I < E; ++I) {
     Changed |= rewriteReg(Register::index2VirtReg(I));
@@ -489,11 +492,32 @@ bool GCNRewritePartialRegUses::runOnMachineFunction(MachineFunction &MF) {
   return Changed;
 }
 
-char GCNRewritePartialRegUses::ID;
+bool GCNRewritePartialRegUsesLegacy::runOnMachineFunction(MachineFunction &MF) {
+  auto *LISWrapper = getAnalysisIfAvailable<LiveIntervalsWrapperPass>();
+  auto LIS = LISWrapper ? &LISWrapper->getLIS() : nullptr;
+  GCNRewritePartialRegUsesImpl Impl(LIS);
+  return Impl.run(MF);
+}
+
+PreservedAnalyses
+GCNRewritePartialRegUsesPass::run(MachineFunction &MF,
+                                  MachineFunctionAnalysisManager &MFAM) {
+  auto *LIS = MFAM.getCachedResult<LiveIntervalsAnalysis>(MF);
+  if (!GCNRewritePartialRegUsesImpl(LIS).run(MF))
+    return PreservedAnalyses::all();
+
+  auto PA = getMachineFunctionPassPreservedAnalyses();
+  PA.preserveSet<CFGAnalyses>();
+  PA.preserve<LiveIntervalsAnalysis>();
+  PA.preserve<SlotIndexesAnalysis>();
+  return PA;
+}
+
+char GCNRewritePartialRegUsesLegacy::ID;
 
-char &llvm::GCNRewritePartialRegUsesID = GCNRewritePartialRegUses::ID;
+char &llvm::GCNRewritePartialRegUsesID = GCNRewritePartialRegUsesLegacy::ID;
 
-INITIALIZE_PASS_BEGIN(GCNRewritePartialRegUses, DEBUG_TYPE,
+INITIALIZE_PASS_BEGIN(GCNRewritePartialRegUsesLegacy, DEBUG_TYPE,
                       "Rewrite Partial Register Uses", false, false)
-INITIALIZE_PASS_END(GCNRewritePartialRegUses, DEBUG_TYPE,
+INITIALIZE_PASS_END(GCNRewritePartialRegUsesLegacy, DEBUG_TYPE,
                     "Rewrite Partial Register Uses", false, false)
diff --git a/llvm/lib/Target/AMDGPU/GCNRewritePartialRegUses.h b/llvm/lib/Target/AMDGPU/GCNRewritePartialRegUses.h
new file mode 100644
index 000000000000000..b2c3190b5c6ba0f
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/GCNRewritePartialRegUses.h
@@ -0,0 +1,23 @@
+//===- GCNRewritePartialRegUses.h -------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_GCNREWRITEPARTIALREGUSES_H
+#define LLVM_LIB_TARGET_AMDGPU_GCNREWRITEPARTIALREGUSES_H
+
+#include "llvm/CodeGen/MachinePassManager.h"
+
+namespace llvm {
+class GCNRewritePartialRegUsesPass
+    : public PassInfoMixin<GCNRewritePartialRegUsesPass> {
+public:
+  PreservedAnalyses run(MachineFunction &MF,
+                        MachineFunctionAnalysisManager &MFAM);
+};
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_AMDGPU_GCNREWRITEPARTIALREGUSES_H
diff --git a/llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses-dbg.mir b/llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses-dbg.mir
index 85d0c054754d03d..ede043ce73a47fb 100644
--- a/llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses-dbg.mir
+++ b/llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses-dbg.mir
@@ -1,5 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2
 # RUN: llc -mtriple=amdgcn-amd-amdhsa -amdgpu-enable-rewrite-partial-reg-uses=true -verify-machineinstrs -start-before=rename-independent-subregs -stop-after=rewrite-partial-reg-uses %s -o - | FileCheck -check-prefix=CHECK %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -passes="rename-independent-subregs,amdgpu-rewrite-partial-reg-uses" %s -o - | FileCheck -check-prefix=CHECK %s
 --- |
   define void @test_vreg_96_w64() !dbg !5 {
   entry:
diff --git a/llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses-gen.mir b/llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses-gen.mir
index 037f39df8c3e06e..79e9ce27376950a 100644
--- a/llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses-gen.mir
+++ b/llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses-gen.mir
@@ -1,5 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 # RUN: llc -mtriple=amdgcn-amd-amdhsa -amdgpu-enable-rewrite-partial-reg-uses=true -verify-machineinstrs -start-before=rename-independent-subregs -stop-after=rewrite-partial-reg-uses %s -o - | FileCheck -check-prefix=CHECK %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -passes="rename-independent-subregs,amdgpu-rewrite-partial-reg-uses" %s -o - | FileCheck -check-prefix=CHECK %s
 ---
 name: test_subregs_composition_vreg_1024
 tracksRegLiveness: true
diff --git a/llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses.mir b/llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses.mir
index 07e49dcdafd8cc3..33007ee8a7c38d6 100644
--- a/llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses.mir
+++ b/llvm/test/CodeGen/AMDGPU/rewrite-partial-reg-uses.mir
@@ -1,5 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 # RUN: llc -mtriple=amdgcn-amd-amdhsa -amdgpu-enable-rewrite-partial-reg-uses=true -verify-machineinstrs -start-before=rename-independent-subregs -stop-after=rewrite-partial-reg-uses %s -o - | FileCheck -check-prefix=CHECK %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -passes="rename-independent-subregs,amdgpu-rewrite-partial-reg-uses" %s -o - | FileCheck -check-prefix=CHECK %s
 ---
 name: test_subregs_composition_vreg_1024
 tracksRegLiveness: true

@github-actions
Copy link

github-actions bot commented Feb 11, 2025

✅ With the latest revision this PR passed the C/C++ code formatter.

#include "GCNIterativeScheduler.h"
#include "GCNPreRALongBranchReg.h"
#include "GCNPreRAOptimizations.h"
#include "GCNRewritePartialRegUses.h"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems to be unused

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Required by the includes of AMDGPUPassRegistry.def

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh yes

@vikramRH vikramRH merged commit 9c725ef into llvm:main Feb 12, 2025
8 checks passed
flovent pushed a commit to flovent/llvm-project that referenced this pull request Feb 13, 2025
joaosaffran pushed a commit to joaosaffran/llvm-project that referenced this pull request Feb 14, 2025
sivan-shani pushed a commit to sivan-shani/llvm-project that referenced this pull request Feb 24, 2025
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

5 participants